# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
%load_ext autoreload
%autoreload 2
%aimport -pandas
%aimport -torch
%aimport -numpy
%aimport -matplotlib
%aimport -dask
%aimport -tqdm
%matplotlib inline
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm
from IPython.display import display, HTML
import warnings
warnings.simplefilter('once')
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
import holoviews as hv
from holoviews import opts
from holoviews.operation.datashader import datashade, dynspread
hv.extension('bokeh', inline=True)
from seq2seq_time.visualization.hv_ggplot import ggplot_theme
hv.renderer('bokeh').theme = ggplot_theme
hv.archive.auto()
# holoview datashader timeseries options
%opts RGB [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
%opts Curve [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
%opts Scatter [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
%opts Layout [width=800 height=200]
Automatic capture is now enabled. [2020-11-02 07:46:01]
# # device = "cuda" if torch.cuda.is_available() else "cpu"
# print(f'using {device}')
window_past = 48*2
window_future = 48
batch_size = 4
datasets_root = Path('../data/processed/')
from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction, BejingPM25, GasSensor, MetroInterstateTraffic
datasets = [IMOSCurrentsVel, BejingPM25, GasSensor, AppliancesEnergyPrediction, MetroInterstateTraffic, ]
datasets
[seq2seq_time.data.data.IMOSCurrentsVel, seq2seq_time.data.data.BejingPM25, seq2seq_time.data.data.GasSensor, seq2seq_time.data.data.AppliancesEnergyPrediction, seq2seq_time.data.data.MetroInterstateTraffic]
# plot a batch
def plot_batch_y(ds, i):
x_past, y_past, x_future, y_future = ds.get_rows(i)
y = pd.concat([y_past, y_future])
p = hv.Scatter(y)
now = y_past.index[-1]
p *= hv.VLine(now).relabel('now').opts(color='red')
return p
def plot_batches_y(dataset, window_past=window_past, window_future=window_future):
ds_name = type(dataset).__name__
opts=dict(width=200, height=100, xaxis=None, yaxis=None)
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
window_future=window_future)
n = 4
max_i = min(len(ds_train), len(ds_val), len(ds_test))
ii = list(np.linspace(0, max_i-10, n-1).astype(int)) + [-1]
l = hv.Layout()
for i in ii:
l += plot_batch_y(ds_train, i).opts(title=f'train {i}', **opts)
l += plot_batch_y(ds_val, i).opts(title=f'val {i}', **opts)
l += plot_batch_y(ds_test, i).opts(title=f'test {i}', **opts)
return l.opts(shared_axes=False, toolbar='right', title=f"{ds_name} freq={d.df.index.freq.freqstr}").cols(3)
for dataset in datasets:
d = dataset(datasets_root)
display(HTML(f"<h3>{dataset.__name__}</h3>"))
print(d.__doc__)
print(f'{len(d)} rows at freq{d.df.index.freq.freqstr}')
print('columns_forecast', d.columns_forecast)
print('columns_past', d.columns_past)
print('columns_target', d.columns_target)
print
display(d.df)
Current Speed at Two Rocks, Western Australia, with a water depth of 200 m. The mooring is located at Lat -31.719 Lon 115.03. Has tidal periods as features.
see:
- https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/bbfc20d3-0e98-40a8-bd8a-3f7717eafb6d
- http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/
and http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html
And https://en.wikipedia.org/wiki/Theory_of_tides
40708 rows at freq30T
columns_forecast ['M2', 'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4', 'MK3', 'MM', 'SSA', 'SA']
columns_past {'UCUR', 'DEPTH', 'WCUR', 'VCUR', 'TEMP'}
columns_target ['SPD']
| VCUR | UCUR | WCUR | TEMP | DEPTH | M2 | S2 | N2 | K2 | K1 | ... | P1 | Q1 | M4 | M6 | S4 | MK3 | MM | SSA | SA | SPD | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TIME | |||||||||||||||||||||
| 2011-06-09 15:30:00 | 0.204021 | -0.158203 | -0.001695 | 20.830000 | 201.967545 | 0.104076 | -0.422618 | 0.283656 | 0.306084 | 0.583774 | ... | 0.344861 | -0.656472 | -0.986246 | -0.310180 | -0.642788 | -0.508251 | 0.997041 | -0.908848 | 0.213485 | 0.258172 |
| 2011-06-09 16:00:00 | 0.105118 | -0.094232 | -0.008524 | 20.863333 | 201.973831 | 0.269541 | -0.572121 | 0.437399 | 0.464780 | 0.510936 | ... | 0.261805 | -0.596573 | -0.853767 | -0.722387 | -0.338557 | -0.267432 | 0.997619 | -0.909047 | 0.213252 | 0.142034 |
| 2011-06-09 16:30:00 | 0.121471 | -0.002748 | -0.008752 | 20.940001 | 201.987396 | 0.502359 | -0.764101 | 0.645351 | 0.675963 | 0.395088 | ... | 0.134032 | -0.500549 | -0.496055 | -0.990386 | 0.171889 | 0.117156 | 0.998467 | -0.909346 | 0.212902 | 0.133557 |
| 2011-06-09 17:00:00 | 0.143786 | -0.118950 | 0.004436 | 21.020000 | 201.993332 | 0.703208 | -0.904008 | 0.813757 | 0.840830 | 0.272444 | ... | 0.003978 | -0.397689 | -0.014085 | -0.714982 | 0.636277 | 0.484663 | 0.999293 | -0.909644 | 0.212551 | 0.186944 |
| 2011-06-09 17:30:00 | 0.068450 | -0.074994 | -0.011574 | 21.129999 | 202.001282 | 0.859309 | -0.982309 | 0.932296 | 0.948083 | 0.145112 | ... | -0.126143 | -0.289399 | 0.471412 | -0.047283 | 0.930175 | 0.781508 | 1.000096 | -0.909941 | 0.212201 | 0.102691 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2014-05-16 01:30:00 | -0.428396 | 0.090252 | -0.013913 | 15.676667 | 201.782959 | 0.990709 | 0.641816 | 0.485959 | -0.987066 | -0.992151 | ... | -0.480382 | -0.502142 | 0.955280 | 0.895072 | -0.170206 | -0.105201 | 0.616808 | -0.299223 | 0.591936 | 0.438596 |
| 2014-05-16 02:00:00 | -0.492208 | 0.106152 | -0.014862 | 15.690000 | 201.785599 | 0.922364 | 0.422326 | 0.255914 | -0.975840 | -0.989219 | ... | -0.362221 | -0.399381 | 0.695027 | 0.357815 | -0.634967 | -0.474076 | 0.620623 | -0.299906 | 0.591647 | 0.503897 |
| 2014-05-16 02:30:00 | -0.519550 | 0.110623 | -0.004837 | 15.713333 | 201.775696 | 0.795324 | 0.174055 | 0.010186 | -0.897749 | -0.969268 | ... | -0.237896 | -0.291166 | 0.260675 | -0.375769 | -0.929589 | -0.773832 | 0.624425 | -0.300590 | 0.591359 | 0.531268 |
| 2014-05-16 03:00:00 | -0.508658 | 0.097735 | -0.005429 | 15.770000 | 201.748566 | 0.617673 | -0.086077 | -0.236166 | -0.758145 | -0.932642 | ... | -0.109523 | -0.178975 | -0.238974 | -0.903175 | -0.975129 | -0.960765 | 0.628213 | -0.301274 | 0.591069 | 0.517970 |
| 2014-05-16 03:30:00 | -0.526861 | 0.112729 | -0.001550 | 15.810000 | 201.721451 | 0.477724 | -0.257985 | -0.394165 | -0.637094 | -0.899834 | ... | -0.022783 | -0.102779 | -0.551476 | -1.008378 | -0.866887 | -1.013969 | 0.630731 | -0.301729 | 0.590877 | 0.538786 |
51433 rows × 21 columns
PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
See:
- http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
- https://en.wikipedia.org/wiki/Particulates
41757 rows at freqH
columns_forecast ['month', 'day', 'week', 'hour', 'minute', 'dayofweek']
columns_past {'cbwd', 'Is', 'Iws', 'PRES', 'TEMP', 'DEWP', 'Ir'}
columns_target ['log_pm2.5']
| DEWP | TEMP | PRES | cbwd | Iws | Is | Ir | log_pm2.5 | month | day | week | hour | minute | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2010-01-02 00:00:00+08:00 | -16.0 | -4.0 | 1020.0 | SE | 1.79 | 0.0 | 0.0 | 4.859812 | 1 | 2 | 53 | 0 | 0 | 5 |
| 2010-01-02 01:00:00+08:00 | -15.0 | -4.0 | 1020.0 | SE | 2.68 | 0.0 | 0.0 | 4.997212 | 1 | 2 | 53 | 1 | 0 | 5 |
| 2010-01-02 02:00:00+08:00 | -11.0 | -5.0 | 1021.0 | SE | 3.57 | 0.0 | 0.0 | 5.068904 | 1 | 2 | 53 | 2 | 0 | 5 |
| 2010-01-02 03:00:00+08:00 | -7.0 | -5.0 | 1022.0 | SE | 5.36 | 1.0 | 0.0 | 5.198497 | 1 | 2 | 53 | 3 | 0 | 5 |
| 2010-01-02 04:00:00+08:00 | -7.0 | -5.0 | 1022.0 | SE | 6.25 | 2.0 | 0.0 | 4.927254 | 1 | 2 | 53 | 4 | 0 | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2014-12-31 19:00:00+08:00 | -23.0 | -2.0 | 1034.0 | NW | 231.97 | 0.0 | 0.0 | 2.079443 | 12 | 31 | 1 | 19 | 0 | 2 |
| 2014-12-31 20:00:00+08:00 | -22.0 | -3.0 | 1034.0 | NW | 237.78 | 0.0 | 0.0 | 2.302586 | 12 | 31 | 1 | 20 | 0 | 2 |
| 2014-12-31 21:00:00+08:00 | -22.0 | -3.0 | 1034.0 | NW | 242.70 | 0.0 | 0.0 | 2.302586 | 12 | 31 | 1 | 21 | 0 | 2 |
| 2014-12-31 22:00:00+08:00 | -22.0 | -4.0 | 1034.0 | NW | 246.72 | 0.0 | 0.0 | 2.079443 | 12 | 31 | 1 | 22 | 0 | 2 |
| 2014-12-31 23:00:00+08:00 | -21.0 | -3.0 | 1034.0 | NW | 249.85 | 0.0 | 0.0 | 2.484907 | 12 | 31 | 1 | 23 | 0 | 2 |
43800 rows × 14 columns
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
295653 rows at freq300L
columns_forecast ['Flow rate (mL/min)', 'Heater voltage (V)']
columns_past {'Temperature (C)', 'CO (ppm)', 'Humidity (%r.h.)'}
columns_target ['R1 (MOhm)']
| CO (ppm) | Humidity (%r.h.) | Temperature (C) | Flow rate (mL/min) | Heater voltage (V) | R1 (MOhm) | |
|---|---|---|---|---|---|---|
| Time (s) | ||||||
| 2016-10-04 10:41:24.000 | 0.0 | 54.6258 | 25.3178 | 242.5724 | 0.2030 | 55.1483 |
| 2016-10-04 10:41:24.300 | 0.0 | 52.6300 | 25.3000 | 241.5326 | 0.2020 | 70.7619 |
| 2016-10-04 10:41:24.600 | 0.0 | 52.6300 | 25.3000 | 241.2315 | 0.2020 | 68.5571 |
| 2016-10-04 10:41:24.900 | 0.0 | 52.6300 | 25.3000 | 240.9315 | 0.2010 | 69.1448 |
| 2016-10-04 10:41:25.200 | 0.0 | 52.6300 | 25.3000 | 240.6521 | 0.2007 | 61.4100 |
| ... | ... | ... | ... | ... | ... | ... |
| 2016-10-05 11:56:32.400 | 0.0 | 63.0900 | 25.3800 | 0.0000 | 0.2000 | 4.0125 |
| 2016-10-05 11:56:32.700 | 0.0 | 63.0900 | 25.3800 | 0.0000 | 0.2000 | 3.3697 |
| 2016-10-05 11:56:33.000 | 0.0 | 63.0900 | 25.3800 | 0.0000 | 0.2000 | 2.8750 |
| 2016-10-05 11:56:33.300 | 0.0 | 63.0900 | 25.3800 | 0.0000 | 0.2000 | 2.4623 |
| 2016-10-05 11:56:33.600 | 0.0 | 63.0900 | 25.3800 | 0.0000 | 0.2000 | 2.1432 |
303033 rows × 6 columns
Appliances energy use in a low energy building.
See: https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
19735 rows at freq10T
columns_forecast ['month', 'day', 'week', 'hour', 'minute', 'dayofweek']
columns_past {'RH_3', 'T9', 'T5', 'T2', 'Tdewpoint', 'T8', 'lights', 'rv2', 'RH_out', 'T1', 'Visibility', 'RH_6', 'T7', 'T_out', 'RH_8', 'Windspeed', 'T3', 'RH_9', 'T4', 'RH_4', 'RH_5', 'RH_2', 'RH_7', 'RH_1', 'rv1', 'T6', 'Press_mm_hg'}
columns_target ['log_Appliances']
| lights | T1 | RH_1 | T2 | RH_2 | T3 | RH_3 | T4 | RH_4 | T5 | ... | Tdewpoint | rv1 | rv2 | log_Appliances | month | day | week | hour | minute | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date | |||||||||||||||||||||
| 2016-01-11 17:00:00 | 30 | 19.890000 | 47.596667 | 19.200000 | 44.790000 | 19.790000 | 44.730000 | 19.000000 | 45.566667 | 17.166667 | ... | 5.300000 | 13.275433 | 13.275433 | 4.094345 | 1 | 11 | 2 | 17 | 0 | 0 |
| 2016-01-11 17:10:00 | 30 | 19.890000 | 46.693333 | 19.200000 | 44.722500 | 19.790000 | 44.790000 | 19.000000 | 45.992500 | 17.166667 | ... | 5.200000 | 18.606195 | 18.606195 | 4.094345 | 1 | 11 | 2 | 17 | 10 | 0 |
| 2016-01-11 17:20:00 | 30 | 19.890000 | 46.300000 | 19.200000 | 44.626667 | 19.790000 | 44.933333 | 18.926667 | 45.890000 | 17.166667 | ... | 5.100000 | 28.642668 | 28.642668 | 3.912023 | 1 | 11 | 2 | 17 | 20 | 0 |
| 2016-01-11 17:30:00 | 40 | 19.890000 | 46.066667 | 19.200000 | 44.590000 | 19.790000 | 45.000000 | 18.890000 | 45.723333 | 17.166667 | ... | 5.000000 | 45.410389 | 45.410389 | 3.912023 | 1 | 11 | 2 | 17 | 30 | 0 |
| 2016-01-11 17:40:00 | 40 | 19.890000 | 46.333333 | 19.200000 | 44.530000 | 19.790000 | 45.000000 | 18.890000 | 45.530000 | 17.200000 | ... | 4.900000 | 10.084097 | 10.084097 | 4.094345 | 1 | 11 | 2 | 17 | 40 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2016-05-27 17:20:00 | 0 | 25.566667 | 46.560000 | 25.890000 | 42.025714 | 27.200000 | 41.163333 | 24.700000 | 45.590000 | 23.200000 | ... | 13.333333 | 43.096812 | 43.096812 | 4.605170 | 5 | 27 | 21 | 17 | 20 | 4 |
| 2016-05-27 17:30:00 | 0 | 25.500000 | 46.500000 | 25.754000 | 42.080000 | 27.133333 | 41.223333 | 24.700000 | 45.590000 | 23.230000 | ... | 13.300000 | 49.282940 | 49.282940 | 4.499810 | 5 | 27 | 21 | 17 | 30 | 4 |
| 2016-05-27 17:40:00 | 10 | 25.500000 | 46.596667 | 25.628571 | 42.768571 | 27.050000 | 41.690000 | 24.700000 | 45.730000 | 23.230000 | ... | 13.266667 | 29.199117 | 29.199117 | 5.598422 | 5 | 27 | 21 | 17 | 40 | 4 |
| 2016-05-27 17:50:00 | 10 | 25.500000 | 46.990000 | 25.414000 | 43.036000 | 26.890000 | 41.290000 | 24.700000 | 45.790000 | 23.200000 | ... | 13.233333 | 6.322784 | 6.322784 | 6.040255 | 5 | 27 | 21 | 17 | 50 | 4 |
| 2016-05-27 18:00:00 | 10 | 25.500000 | 46.600000 | 25.264286 | 42.971429 | 26.823333 | 41.156667 | 24.700000 | 45.963333 | 23.200000 | ... | 13.200000 | 34.118851 | 34.118851 | 6.063785 | 5 | 27 | 21 | 18 | 0 | 4 |
19735 rows × 34 columns
Hourly traffic volume for Interstate 94 (I-94) in the U.S. state of Minnesota. Includes weather and holiday features from 2012-2018.
See: https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume
40575 rows at freqH
columns_forecast ['holiday', 'month', 'day', 'week', 'hour', 'minute', 'dayofweek']
columns_past {'temp', 'snow_1h', 'weather_main', 'rain_1h', 'clouds_all', 'weather_description'}
columns_target ['traffic_volume']
| holiday | temp | rain_1h | snow_1h | clouds_all | weather_main | weather_description | traffic_volume | month | day | week | hour | minute | dayofweek | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date_time | ||||||||||||||
| 2012-10-02 09:00:00 | True | 288.28 | 0.0 | 0.0 | 40.0 | Clouds | scattered clouds | 5545.0 | 10 | 2 | 40 | 9 | 0 | 1 |
| 2012-10-02 10:00:00 | True | 289.36 | 0.0 | 0.0 | 75.0 | Clouds | broken clouds | 4516.0 | 10 | 2 | 40 | 10 | 0 | 1 |
| 2012-10-02 11:00:00 | True | 289.58 | 0.0 | 0.0 | 90.0 | Clouds | overcast clouds | 4767.0 | 10 | 2 | 40 | 11 | 0 | 1 |
| 2012-10-02 12:00:00 | True | 290.13 | 0.0 | 0.0 | 90.0 | Clouds | overcast clouds | 5026.0 | 10 | 2 | 40 | 12 | 0 | 1 |
| 2012-10-02 13:00:00 | True | 291.14 | 0.0 | 0.0 | 75.0 | Clouds | broken clouds | 4918.0 | 10 | 2 | 40 | 13 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2018-09-30 19:00:00 | True | 283.45 | 0.0 | 0.0 | 75.0 | Clouds | broken clouds | 3543.0 | 9 | 30 | 39 | 19 | 0 | 6 |
| 2018-09-30 20:00:00 | True | 282.76 | 0.0 | 0.0 | 90.0 | Clouds | overcast clouds | 2781.0 | 9 | 30 | 39 | 20 | 0 | 6 |
| 2018-09-30 21:00:00 | True | 282.73 | 0.0 | 0.0 | 90.0 | Thunderstorm | proximity thunderstorm | 2159.0 | 9 | 30 | 39 | 21 | 0 | 6 |
| 2018-09-30 22:00:00 | True | 282.09 | 0.0 | 0.0 | 90.0 | Clouds | overcast clouds | 1450.0 | 9 | 30 | 39 | 22 | 0 | 6 |
| 2018-09-30 23:00:00 | True | 282.12 | 0.0 | 0.0 | 90.0 | Clouds | overcast clouds | 954.0 | 9 | 30 | 39 | 23 | 0 | 6 |
52551 rows × 14 columns
# View train, test, val splits
for dataset in datasets:
ds_name = type(dataset).__name__
d = dataset(datasets_root)
print(d)
display(plot_batches_y(d))
<IMOSCurrentsVel (51433, 21)>
<BejingPM25 (43800, 14)>
<GasSensor (303033, 6)>
<AppliancesEnergyPrediction (19735, 34)>
<MetroInterstateTraffic (52551, 14)>
# def plot_batch_x(ds, i):
# """Plot input features"""
# x_past, y_past, x_future, y_future = ds.get_rows(i)
# x = pd.concat([x_past, x_future])
# p = hv.NdOverlay({
# col: hv.Curve(x[col]) for col in x.columns
# }, kdims='column')
# now = y_past.index[-1]
# p *= hv.VLine(now).relabel('now').opts(color='red')
# return p
# def plot_batches_x(d):
# """Plot input features for multiple batch"""
# ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
# window_future=window_future)
# l = plot_batch_x(ds_train, 10) + plot_batch_x(ds_val, 10) + plot_batch_x(ds_test, 10)
# l = l.cols(1).opts(shared_axes=False, title=f'{type(d).__name__}')
# return l
# ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
# window_future=window_future)
# # View input columns
# for dataset in datasets:
# d = dataset(datasets_root)
# display(plot_batches_x(d))
hv.archive.export()
hv.archive.last_export_status()
Export name: '01.0-mc-datasets' Directory '/media/wassname/Storage5/projects2/3ST/seq2seq-time/notebooks' If no output appears, please check holoviews.archive.last_export_status()